package pri.lt.parser;

import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.util.Iterator;

public class Util {

    /**
     * 获得字符集
     */
    public static String getCharset (Document doc) throws Exception{
        Elements eles = doc.select("meta[http-equiv=Content-Type]");
        Iterator<Element> itor = eles.iterator();
        while (itor.hasNext()) {
            String charset =  itor.next().attr("charset");
            if (charset != null && charset.trim().length() > 0) {
                return charset.trim();
            }
        }
        return "gb2312";
    }
}
